This file may take some time to load at times. Please be patient!
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(ggcorrplot)
library(htmltools)
library(ggpubr)
source("functions/functions.R")
data <-
readRDS("data/gemeinderatswahlen_alldata.rds")
Create data set used in main RDD analysis.
# similar data set definition as in "Analysis.Rmd"
d_rdd_bivariate <- data %>%
select(ln_gross_expenditure_pc, total_seats, inhabs_rel_to_cutoff, above_cutoff, inhabitants_treshold, year, ags, election_year, state, closest, exact_pop, kreisfreie_stadt) %>%
drop_na(ln_gross_expenditure_pc, total_seats, inhabs_rel_to_cutoff)
data %>%
select(where(is.numeric)) %>%
reshape2::melt(.) %>%
ggplot(., aes(x = value)) +
facet_wrap(~variable, scales = "free") +
geom_histogram()
## No id variables; using all as measure variables
## Warning: attributes are not identical across measure variables; they will be
## dropped
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8117217 rows containing non-finite values (stat_bin).
ggsave("plots/histograms.svg", width = 45, height = 45, units = "cm")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 8117217 rows containing non-finite values (stat_bin).
data %>%
select(where(is.numeric)) %>%
cor(., use = "pairwise.complete.obs") %>%
ggcorrplot::ggcorrplot(
.,
colors = c("red", "white", "blue"),
#lab = TRUE,
digits = 1
)
## Warning in cor(., use = "pairwise.complete.obs"): the standard deviation is zero
ggsave("plots/bivariate_correlations.svg", width = 45, height = 45, units = "cm")
Define cutoffs for each state, in some cases depending on years (if reforms took place) and whether town is muinicipality (“Gemeinde”) or “district-free city” (“kreisfreie Stadt”).
“th” stands for threshold.
th_1_muni_since_2012 <- c(70, 200, 750, 1250, 2500, 5000, 10000, 15000, 25000, 35000, 45000)
th_1_muni_before_2012 <- c(70, 200, 750, 1250, 2000, 5000, 10000, 15000, 25000, 35000, 45000)
th_1_krefr <- c(150000)
#th_3 <- c(500, 1000, 2000, 3000, 5000, 6000, 7000, 8000, 9000, 10000, 11000, 12000, 15000, 20000, 25000, 30000, 40000, 50000, 75000, 100000, 125000, 150000, 175000, 200000, 250000, 300000, 350000, 400000, 500000, 600000)
th_5 <- c(5000, 8000, 15000, 30000, 50000, 100000, 250000, 400000, 550000, 700000)
th_6 <- c(3000, 5000, 10000, 25000, 50000, 100000, 250000, 500000, 1000000)
th_7 <- c(300, 500, 1000, 2500, 5000, 7500, 10000, 15000, 20000, 30000, 40000, 60000, 80000, 100000, 150000)
th_8 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 400000)
th_9 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 100000, 200000, 500000) # Except Nuremburg and Munich
th_10 <- c(10000, 20000, 30000, 40000, 60000, 100000)
th_12_muni_since_2008 <- c(700, 1500, 2500, 5000, 10000, 15000, 25000, 35000, 45000)
#th_12_muni_before_2008 <- c(200, 700, 1500, 2500, 5000, 10000, 15000, 25000, 35000, 45000)
th_12_krefr <- c(100000, 150000)
th_13 <- c(500, 1000, 1500, 3000, 4500, 6000, 7500, 10000, 20000, 30000, 50000, 75000, 100000, 150000)
th_14 <- c(500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 40000, 50000, 60000, 80000, 150000, 400000)
th_15_since_2014 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 300000)
th_15_before_2014 <- c(100, 500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 300000)
th_16 <- c(500, 1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 100000, 200000)
01 Schleswig-Holstein (with “2002” representing the 1998 election): You can clearly see municipalities with 2000-2500 inhabitants “overlapping” in terms of total seats (compare 2018 to prior elections):
data %>%
filter(state == 1 & exact_pop > 1250 & exact_pop <= 5000 & (election_year == 1 | year == 2002)) %>%
ggplot() +
geom_point(aes(x = exact_pop, y = total_seats, colour = as.factor(year)))
12 Brandenburg – population data only for 2019:
data %>%
filter(state == 12 & exact_pop <= 700 & election_year == 1) %>%
ggplot() +
geom_point(aes(x = exact_pop, y = total_seats, colour = as.factor(year)))
15 Saxony-Anhalt (with “2002” representing the 1999 election): After redistricting in 2007, municipalities are larger than before (and far less exist).
data %>%
filter(state == 15 & exact_pop <= 1000 & (election_year == 1 | year == 2002)) %>%
ggplot() +
geom_point(aes(x = exact_pop, y = total_seats, colour = as.factor(year)))
data %>%
filter(state == 15 & exact_pop <= 1000 & (election_year == 1 | year == 2002)) %>%
group_by(year) %>%
summarize(mean = mean(exact_pop), median = median(exact_pop), n = n())
States with no changes in cutoffs and no distinction between municipalities and “district-free cities” only (i. e. 01, 12, 15 excluded). 03 Lower Saxony excluded due to missing data. Vertical bars indicate cutoffs.
plot_list <- htmltools::tagList()
for (s in c(5:10, 13, 14, 16)) {
df_subset <- data %>%
filter(state == s & (election_year == 1 | year == 2002))
correlation <-
cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = get(paste0("th_", s))) +
geom_point(aes(
x = exact_pop,
y = total_seats,
colour = as.factor(year),
text = paste("town:", town)
)) +
labs(title = paste0("State: ", s, "; linear correlation: ", correlation)) +
scale_x_log10()
plot_list[[s]] <- plotly::ggplotly(plot)
}
## Warning: Ignoring unknown aesthetics: text
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
## Warning: Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
plot_list
We observe some countries where cutoffs play a huge role (06 Hesse,
07 Rhineland-Palantine, 09 Bavaria, 10 Saarland, 16 Thuringia). In other
states they do not and total_seats spreads more (05 North
Rhine-Westphalia, 08 Baden-Württemberg, 14 Sachsen-Anhalt).
“Special” states:
Red vertical lines indicate a cutoff that did not exist over the entire time period.
# 01 Schleswig-Holstein
df_subset <- data %>%
filter(state == 1 & (election_year == 1 | year == 2002) & kreisfreie_stadt == 0)
correlation <-
cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = th_1_muni_since_2012) +
geom_vline(xintercept = c(2000, 2500), colour = "red") +
geom_point(aes(
x = exact_pop,
y = total_seats,
colour = as.factor(year),
text = paste("town:", town)
)) +
labs(title = paste0("State: ", 1, "; linear correlation: ", correlation, "; no kreisfreie Städte")) +
scale_x_log10()
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
# 12 Brandenburg
df_subset <- data %>%
filter(state == 12 & (election_year == 1 | year == 2002) & kreisfreie_stadt == 0)
correlation <-
cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = th_12_muni_since_2008) +
geom_point(aes(
x = exact_pop,
y = total_seats,
colour = as.factor(year),
text = paste("town:", town)
)) +
labs(title = paste0("State: ", 12, "; linear correlation: ", correlation, "; no kreisfreie Städte")) +
scale_x_log10()
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
# 15 Saxony-Anhalt
df_subset <- data %>%
filter(state == 15 & (election_year == 1 | year == 2002))
correlation <-
cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = th_15_before_2014, colour = "red") +
geom_vline(xintercept = th_15_since_2014) +
geom_point(aes(
x = exact_pop,
y = total_seats,
colour = as.factor(year),
text = paste("town:", town)
)) +
labs(title = paste0("State: ", 15, "; linear correlation: ", correlation)) +
scale_x_log10()
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
12 Brandenburg also has quite a strong relationship but no population data beyond 2019. 15 Saxony-Anhalt may have had some special rules after 2007 county reform (to do: investigate)!
For 01, cutoff change during observation period.
to do: First year shown in plot not yet correct.
# Create data
th_1 <- c(70, 200, 750, 1250, 5000, 10000, 15000, 25000, 35000, 45000)
th_1_change <- c(2000, 2500)
th_1 <- c(70, 200, 750, 1250, 2000, 5000, 10000, 15000, 25000, 35000, 45000)
th_15 <- c(1000, 2000, 3000, 5000, 10000, 20000, 30000, 50000, 150000, 300000)
th_15_change <- c(100, 500)
th_15 <- c(th_15, th_15_change)
plot_list <- list()
plot_df <- d_rdd_bivariate %>%
mutate(
year = case_when(
state == 1 & year == 2002 ~ 1998,
state %in% c(5, 7, 8, 10, 13, 14, 16) & year == 2002 ~ 1999,
state == 6 & year == 2002 ~ 2001,
TRUE ~ year
),
year = as.factor(year)
)
# Plot
for (s in c(1, 5:10, 13, 14, 15, 16)) {
# Specific rules for "kreisfreie Städte" in 01
df_subset <- plot_df
if (s == 1) {
df_subset <- df_subset %>% filter(kreisfreie_stadt == 0)
}
plot_list[[s]] <- df_subset %>%
filter(state == s) %>%
filter((election_year == 1 | as.numeric(year) == min(as.numeric(year)))) %>%
ggplot() +
geom_vline(xintercept = get(paste0("th_", s))) +
geom_point(aes(
x = exact_pop,
y = total_seats#,
#colour = year
),
size = 0.5) +
#labs(title = paste0("State: ", s)) +
scale_x_log10() +
theme(
axis.title.x=element_blank(), # remove x axis label
axis.title.y=element_blank(), # remove y axis label
legend.title=element_blank() # remove legend title
)
# Cutoff changes
# if (s == 1 | s == 15) {
# plot_list[[s]] <- plot_list[[s]] +
# geom_vline(xintercept = get(paste0("th_", s, "_change")),
# colour = "#F8766D", linetype = "dashed")
# }
# Show plots
#print(plot_list[[s]])
}
rm(df_subset, th_1, th_1_change, th_15, th_15_change)
Arrange plots:
first_stage <- ggpubr::ggarrange(
plot_list[[1]], plot_list[[5]], plot_list[[6]], plot_list[[7]], plot_list[[8]],
plot_list[[9]], plot_list[[10]], plot_list[[13]], plot_list[[14]], plot_list[[15]], plot_list[[16]],
# labels = c("SH", "NW", "Hesse", "Rhineland-Palatinate", "Baden-Württemberg",
# "Bavaria", "Saarland", "Mecklenburg-Vorpommern", "Saxony", "Thuringia"),
labels = paste(" ", c("SH", "NW", "HE", "RP", "BW", "BY", "SL", "MV", "SN", "ST", "TH")),
#label.x = "Population",
#label.y = "Council size",
#common.legend = TRUE,
ncol = 2, nrow = 6) %>%
annotate_figure(
# See https://github.com/kassambara/ggpubr/issues/78
left = text_grob("Council size", rot = 90),
bottom = text_grob("Population")
)
first_stage
Export plots:
ggpubr::ggexport(first_stage, filename = "plots/rdd_first_stage.pdf")
## file saved to plots/rdd_first_stage.pdf
Expected is what I deemed the size of the local parliament based on laws and ignoring voluntary reductions as well as “overhang seats”
df_subset <- data %>%
filter((election_year == 1 | year == 2002))
correlation <-
cor(df_subset$by_law_seats, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
#geom_vline(xintercept = th_1_muni_since_2012) +
#geom_vline(xintercept = c(2000, 2500), colour = "red") +
geom_point(aes(
x = by_law_seats,
y = total_seats,
colour = as.factor(state),
text = paste("town:", town, "<br>year:", year)
)) +
labs(title = paste0("All states", "; linear correlation: ", correlation))
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
Linear difference:
df_subset <- data %>%
filter(election_year == 1 | year == 2002)
correlation <-
cor(df_subset$exact_pop, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = 0) +
geom_point(aes(
x = diff_to_cutoff,
y = total_seats,
colour = as.factor(state),
#shape = as.factor(state),
text = paste("town:", town, "<br>year:", year)
)) +
labs(title = paste0("All states; linear correlation: ", correlation))
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
Using the Egger and Koethenbuerger (2010) calculation steps: \[\tilde{N_i} = N_i/N_d\] with \(N_i\) as the relevant actual population size and \(N_d\) as the respective thresholds.
df_subset <- data %>%
filter(election_year == 1 | year == 2002)
correlation <-
cor(df_subset$inhabs_rel_to_cutoff, df_subset$total_seats, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = 0) +
geom_point(aes(
x = inhabs_rel_to_cutoff,
y = total_seats,
colour = as.factor(state),
#shape = as.factor(state),
text = paste("town:", town, "<br>year:", year)
)) +
labs(title = paste0("All states; linear correlation: ", correlation))
## Warning: Ignoring unknown aesthetics: text
plotly::ggplotly(plot)
Relationship between inhabitants and public spending for those states that did not change cutoffs:
plot_list <- htmltools::tagList()
for (s in c(5:10, 13, 14, 16)) {
df_subset <- data %>%
filter(state == s & (election_year == 1 | year == 2002))
correlation <-
cor(df_subset$exact_pop, df_subset$ln_gross_expenditure_pc, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = get(paste0("th_", s))) +
geom_point(aes(
x = exact_pop,
y = ln_gross_expenditure_pc,
colour = as.factor(year),
text = paste("town:", town)
)) +
labs(title = paste0("State: ", s, "; linear correlation: ", correlation)) +
scale_x_log10() +
scale_y_log10()
plot_list[[s]] <- plotly::ggplotly(plot)
}
## Warning: Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
plot_list
rm(s, plot_list, plot, df_subset)
Relationship between inhabitants and public spending per capita for those states that did not change cutoffs:
plot_list <- htmltools::tagList()
for (s in c(5:10, 13, 14, 16)) {
df_subset <- data %>%
filter(state == s & (election_year == 1 | year == 2002))
correlation <-
cor(df_subset$exact_pop, df_subset$ln_gross_expenditure_pc, use = "pairwise.complete.obs") %>%
round(., 2)
plot <- df_subset %>%
ggplot() +
geom_vline(xintercept = get(paste0("th_", s))) +
geom_point(aes(
x = exact_pop,
y = ln_gross_expenditure_pc,
colour = as.factor(year),
text = paste("town:", town)
)) +
labs(title = paste0("State: ", s, "; linear correlation: ", correlation)) +
scale_x_log10()
plot_list[[s]] <- plotly::ggplotly(plot)
}
## Warning: Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
## Ignoring unknown aesthetics: text
plot_list
rm(s, plot_list, plot, df_subset)